Prepare libraies

library(data.table)
library(tidyverse)
library(rtracklayer)
library(DESeq2)
library(Biostrings)
library(genomation)
library(RColorBrewer)
library(CLIPanalyze)

Calculate scaleFactor for bamCoverage

bamCoverage from Deeptools was used to generated scaled bw files for plotting peak heatmaps

Scalefactors are calculated as the reciprical of DESeq2 estimated sizeFactors using counnts in genes outside of peaks.

dir.create("PDF_figure/Peak_heatmap_merged", showWarnings = FALSE)

load('../Merged_Analysis/merged_peak_analysis.rda')

sizefactor <- sizeFactors(dds.peaks.hva.hvak)

scalefactor <- (1/sizefactor)/max((1/sizefactor))

scalefactor
##      HVA1      HVA2      HVA3      HVA4      HVA5      HVA6     HVAK1     HVAK2 
## 0.1080152 0.4181528 0.2468695 0.1090049 1.0000000 0.5320503 0.1065014 0.1526190 
##     HVAK3     HVAK4     HVAK5     HVAK6 
## 0.5110165 0.1886995 0.1138450 0.1566501

bw file generation was done on cluster.

Load miR-peaks dataset

mirs.peaks <- readRDS("Datafiles/miRNA-merged-peaks-list-09282019.rds")

Analysis

Prepare bw file for visualization

# combine bigwig files of the same genotype
$ cd ../Rescaled analysis/HVA
$ bigWigMerge *.bw HVA_merged.bedGraph

$ cd ../Rescaled analysis/HVAK
$ bigWigMerge *.bw HVAK_merged.bedGraph

convert chromosome name from Ensembl to UCSC format.

chromosomename <- read.table("../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/Ensembl.mm10.chrom.sizes.txt", sep = "\t")[,1][1:22]
hva_bed <- read.table("../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVA/HVA_merged.bedGraph", sep = "\t")
hva_bed <- hva_bed %>% filter(V1 %in% chromosomename)
hva_bed$V1 <- paste0("chr", hva_bed$V1)
hva_bed$V1[hva_bed$V1 == "chrMT"] <- "chrM"
write_delim(hva_bed, file = "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVA_merged_edited.bedGraph", delim = "\t", col_names = F)

hvak_bed <- read.table("../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVAK/HVAK_merged.bedGraph", sep = "\t")
hvak_bed <- hvak_bed %>% filter(V1 %in% chromosomename)
hvak_bed$V1 <- paste0("chr", hvak_bed$V1)
hvak_bed$V1[hvak_bed$V1 == "chrMT"] <- "chrM"
write_delim(hvak_bed, file = "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVAK_merged_edited.bedGraph", delim = "\t", col_names = F)
# bedGraph to bigWig conversion
$ awk 'NR!=1' HVA_merged_edited.bedGraph > input.HVA_merged.bedGraph
$ sort -k1,1 -k2,2n input.HVA_merged.bedGraph > sorted.input.HVA_merged.bedGraph
$ bedGraphToBigWig sorted.input.HVA_merged.bedGraph mm10.chrom.sizes HVA_merged.bw

$ awk 'NR!=1' HVAK_merged_edited.bedGraph > input.HVAK_merged.bedGraph
$ sort -k1,1 -k2,2n input.HVAK_merged.bedGraph > sorted.input.HVAK_merged.bedGraph
$ bedGraphToBigWig sorted.input.HVAK_merged.bedGraph mm10.chrom.sizes HVAK_merged.bw
peaks <- mirs.peaks
peaks <- lapply(peaks, FUN = function(x) {x[order(x$count, decreasing = T)]})
mybw.dir <- "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor"
mybw.files <- list.files(mybw.dir, pattern = "bw$", full.names = T)
print(mybw.files)
## [1] "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVA_merged.bw" 
## [2] "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVAK_merged.bw"
cols <- brewer.pal(name = "Set2", n = 8)
reds <- brewer.pal(name = "Reds", n = 9)
blues <- brewer.pal(name = "Blues", n = 9)
mycolors <-c(blues[6], reds[6]) #HVA, HVAK
light.colors <- alpha(c(blues[3], reds[2]), 0.4)

Histogram

histogram plot function

#plot histograms
peaks_meta <- function(mypeaks = peaks, 
                       miRNA_family = "miR-451a", 
                       dispersion = "se",
                       dispersion.col = NULL,
                       coordinates = c(-400, 400), 
                       line.col = mycolors, 
                       winsorize = c(0,99),
                       title = ""){
  
  suppressWarnings(mypeaks <- lapply(mypeaks, FUN = function(x) {resize(x, width = sum(abs(coordinates)), fix="center")}))
  mypeaks <- GRangesList(mypeaks)
  mysml <- ScoreMatrixList(targets=mybw.files, window=mypeaks[[miRNA_family]], type = "bigWig", strand.aware = T)
  mysampleInfo <- data.frame(basename(mybw.files), c("HVA", "HVAK"))
  names(mysampleInfo) = c("sample", "genotype")
  names(mysml) = mysampleInfo$genotype[match(names(mysml), mysampleInfo$sample)]
  plotMeta(mysml, profile.names = c("HVA", "HVAK"), xcoords = coordinates, dispersion = dispersion, main =title, line.col = line.col, winsorize = winsorize, dispersion.col = dispersion.col)
}
len <- sapply(mirs.peaks, function(x) length(x))
mirs.peaks <- mirs.peaks[order(-len)]
mirna <- names(mirs.peaks)

for (i in 1:10) {
  peaks_meta(mypeaks = peaks, dispersion  = NULL, miRNA_family = mirna[i], title = mirna[i])
  peaks_meta(mypeaks = peaks, dispersion  = "se", miRNA_family = mirna[i], title = mirna[i],
           dispersion.col = light.colors)
}

pdf("PDF_figure/Peak_heatmap_merged//Histogram_peak.pdf",
    height = 4,
    width = 6)
for (i in 1:10) {
  peaks_meta(mypeaks = peaks, dispersion  = NULL, miRNA_family = mirna[i], title = mirna[i])
  peaks_meta(mypeaks = peaks, dispersion  = "se", miRNA_family = mirna[i], title = mirna[i],
           dispersion.col = light.colors)
}
dev.off()
## quartz_off_screen 
##                 2

Heatmap

plot heatmaps

peaks_heat <- function(mypeaks = peaks, 
                       miRNA_family = "miR-451", 
                       col = blues9, 
                       coordinates = c(-400, 400), 
                       order_rows = F, 
                       winsorize_parameters = c(1,98)){
  suppressWarnings(mypeaks <- lapply(mypeaks, FUN = function(x) {resize(x, width = sum(abs(coordinates)), fix="center")}))
  mypeaks <- GRangesList(mypeaks)
  mysml <- ScoreMatrixList(targets=mybw.files[c(1,2)], window=mypeaks[[miRNA_family]], type = "bigWig", strand.aware = T)
  mysampleInfo <- data.frame(basename(mybw.files), c("HVA", "HVAK"))
  names(mysampleInfo) = c("sample", "genotype")
  names(mysml) = paste(mysampleInfo$genotype[match(names(mysml), mysampleInfo$sample)], miRNA_family , sep = "_")
  mysml.scaled = scaleScoreMatrixList(mysml)
  #multiHeatMatrix(mysml.scaled, xcoords = coordinates, col = col)
  multiHeatMatrix(mysml, common.scale = T, xcoords = coordinates, winsorize = winsorize_parameters, col = col, order = order_rows)
}
colfunc <- colorRampPalette(c("white", "blue"))
mycols <- colfunc(128)
for (i in 1:10) {
  peaks_heat(mypeaks = peaks, miRNA_family = mirna[i], col = blues9, winsorize_parameters = c(0,99))
}

pdf("PDF_figure/Peak_heatmap_merged//Heatmap_peak.pdf",
    height = 8,
    width = 8)
for (i in 1:10) {
  peaks_heat(mypeaks = peaks, miRNA_family = mirna[i], col = blues9, winsorize_parameters = c(0,99))
}
dev.off()
## quartz_off_screen 
##                 2

SessionInfo

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] CLIPanalyze_0.0.10          GenomicAlignments_1.24.0   
##  [3] Rsamtools_2.4.0             GenomicFeatures_1.40.1     
##  [5] AnnotationDbi_1.50.3        plyr_1.8.6                 
##  [7] RColorBrewer_1.1-2          genomation_1.20.0          
##  [9] Biostrings_2.56.0           XVector_0.28.0             
## [11] DESeq2_1.28.1               SummarizedExperiment_1.18.2
## [13] DelayedArray_0.14.1         matrixStats_0.59.0         
## [15] Biobase_2.48.0              rtracklayer_1.48.0         
## [17] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
## [19] IRanges_2.22.2              S4Vectors_0.26.1           
## [21] BiocGenerics_0.34.0         forcats_0.5.1              
## [23] stringr_1.4.0               dplyr_1.0.6                
## [25] purrr_0.3.4                 readr_1.4.0                
## [27] tidyr_1.1.3                 tibble_3.1.2               
## [29] ggplot2_3.3.3               tidyverse_1.3.1            
## [31] data.table_1.14.0          
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.0-1       ellipsis_0.3.2         fs_1.5.0              
##  [4] rstudioapi_0.13        farver_2.1.0           bit64_4.0.5           
##  [7] fansi_0.5.0            lubridate_1.7.10       xml2_1.3.2            
## [10] splines_4.0.3          cachem_1.0.5           impute_1.62.0         
## [13] geneplotter_1.66.0     knitr_1.33             jsonlite_1.7.2        
## [16] seqPattern_1.20.0      broom_0.7.7            gridBase_0.4-7        
## [19] annotate_1.66.0        dbplyr_2.1.1           compiler_4.0.3        
## [22] httr_1.4.2             biosignals_0.1.0       backports_1.2.1       
## [25] assertthat_0.2.1       Matrix_1.3-4           fastmap_1.1.0         
## [28] cli_2.5.0              prettyunits_1.1.1      htmltools_0.5.1.1     
## [31] tools_4.0.3            gtable_0.3.0           glue_1.4.2            
## [34] GenomeInfoDbData_1.2.3 reshape2_1.4.4         rappdirs_0.3.3        
## [37] Rcpp_1.0.6             cellranger_1.1.0       jquerylib_0.1.4       
## [40] vctrs_0.3.8            xfun_0.23              rvest_1.0.0           
## [43] lifecycle_1.0.0        XML_3.99-0.6           zlibbioc_1.34.0       
## [46] scales_1.1.1           BSgenome_1.56.0        hms_1.1.0             
## [49] curl_4.3.1             yaml_2.2.1             memoise_2.0.0         
## [52] sass_0.4.0             biomaRt_2.44.4         stringi_1.6.2         
## [55] RSQLite_2.2.7          highr_0.9              genefilter_1.70.0     
## [58] plotrix_3.8-1          BiocParallel_1.22.0    rlang_0.4.11          
## [61] pkgconfig_2.0.3        bitops_1.0-7           evaluate_0.14         
## [64] lattice_0.20-44        bit_4.0.4              tidyselect_1.1.1      
## [67] magrittr_2.0.1         R6_2.5.0               generics_0.1.0        
## [70] DBI_1.1.1              pillar_1.6.1           haven_2.4.1           
## [73] withr_2.4.2            survival_3.2-11        RCurl_1.98-1.3        
## [76] modelr_0.1.8           crayon_1.4.1           KernSmooth_2.23-20    
## [79] utf8_1.2.1             BiocFileCache_1.12.1   rmarkdown_2.9         
## [82] progress_1.2.2         locfit_1.5-9.4         readxl_1.3.1          
## [85] blob_1.2.1             reprex_2.0.0           digest_0.6.27         
## [88] xtable_1.8-4           openssl_1.4.4          munsell_0.5.0         
## [91] bslib_0.2.5.1          askpass_1.1